14b32da8391f6cc766e4ff3fcde16503d137ae8a,src/test/java/org/voyanttools/trombone/input/extract/TikaExtractorTest.java,TikaExtractorTest,testStrings,#,50
Before Change
storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage, parameters);
extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
contents = IOUtils.toString(storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
assertTrue("XML-declared string should contain tags", contents.contains("<b>a</b>"));
}
After Change
storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
inputStream = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId());
contents = IOUtils.toString(inputStream);
inputStream.close();
// contents = IOUtils.toString(storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
assertTrue("HTML string should contain tags", contents.contains("<b>a</b>"));
inputSource = new StringInputSource("<html><body><section><div>This is <b>a</b> test.</div></section></body></html>");
storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
inputStream = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId());
contents = IOUtils.toString(inputStream);
inputStream.close();
// contents = IOUtils.toString(storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
assertTrue("HTML string should contain tags", contents.contains("<b>a</b>"));
// TODO: find a way to keep html5 tags with xhtml transformer assertTrue("HTML string should contain HTML5 tags", contents.contains("<section>"));
inputSource = new StringInputSource("<test>This is <b>a</b> test.</test>");
storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
inputStream = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId());
contents = IOUtils.toString(inputStream);
inputStream.close();
// contents = IOUtils.toString(storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
assertTrue("XML-looking string should contain tags", contents.contains("<b>a</b>"));
parameters.setParameter("inputFormat", "XML");
storedDocumentSource = storeDocumentSourceStorage.getStoredDocumentSource(inputSource);
extractor = new StoredDocumentSourceExtractor(storeDocumentSourceStorage, parameters);
extractedStoredDocumentSource = extractor.getExtractedStoredDocumentSource(storedDocumentSource);
inputStream = storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId());
contents = IOUtils.toString(inputStream);
inputStream.close();
// contents = IOUtils.toString(storeDocumentSourceStorage.getStoredDocumentSourceInputStream(extractedStoredDocumentSource.getId()));
assertTrue("XML-declared string should contain tags", contents.contains("<b>a</b>"));